Functional CpG annotation


Illumina

Example of annotated CpGs

Full anno

delta.meqtl.cpg.anno.df$Relation_to_Island <- factor(delta.meqtl.cpg.anno.df$Relation_to_Island, levels = c("N_Shelf",
    "N_Shore", "Island", "S_Shore", "S_Shelf", "OpenSea"))
delta.meqtl.cpg.anno.df[["Model"]] <- as.factor("Delta")

veh.meqtl.cpg.anno.df$Relation_to_Island <- factor(veh.meqtl.cpg.anno.df$Relation_to_Island, levels = c("N_Shelf",
    "N_Shore", "Island", "S_Shore", "S_Shelf", "OpenSea"))
veh.meqtl.cpg.anno.df[["Model"]] <- as.factor("Baseline")

dex.meqtl.cpg.anno.df$Relation_to_Island <- factor(dex.meqtl.cpg.anno.df$Relation_to_Island, levels = c("N_Shelf",
    "N_Shore", "Island", "S_Shore", "S_Shelf", "OpenSea"))
dex.meqtl.cpg.anno.df[["Model"]] <- as.factor("Dex")

meqtl.cpg.anno.df <- rbind(delta.meqtl.cpg.anno.df, veh.meqtl.cpg.anno.df, dex.meqtl.cpg.anno.df)

head(meqtl.cpg.anno.df, 100)

Closest genes

head(cpg.closest.genes.dist.df, 100)

Chromosomes

library(GenomicFeatures)
chr.length.tbl <- getChromInfoFromUCSC("hg19")[, c(1, 2)] %>%
    setDT()
chr.length.tbl <- chr.length.tbl[chrom %in% paste0("chr", 1:22)]  #, 'X', 'Y'))]
colnames(chr.length.tbl) <- c("chr", "chr_size")
chr.length.tbl$chr <- as.factor(chr.length.tbl$chr)

chr.order <- paste("chr", c(1:22), sep = "")
meth <- delta.meqtl.cpg.anno.df
meth$chr <- factor(meth$chr, levels = chr.order)
meth <- left_join(meth, chr.length.tbl)

library(janitor)
chr.cpgs.cnt <- tabyl(meth$chr, sort = T)
colnames(chr.cpgs.cnt) <- c("chr", "cnt_cpg", "freq_cpg")

chr.cpgs.cnt <- left_join(chr.cpgs.cnt, chr.length.tbl, by = "chr")
chr.cpgs.cnt$chr <- factor(chr.cpgs.cnt$chr, levels = chr.order)
chr.cpgs.cnt[["freq_chr"]] <- chr.cpgs.cnt$chr_size/sum(chr.cpgs.cnt$chr_size)
chr.cpgs.cnt[["cpg_chr_cnt"]] <- chr.cpgs.cnt$cnt_cpg/chr.cpgs.cnt$freq_chr
chr.cpgs.cnt[["cpg_chr_freq"]] <- chr.cpgs.cnt$cpg_chr_cnt/sum(chr.cpgs.cnt$cpg_chr_cnt)

# Relative freq = (Number of CpGs on the chromosome (i) * (length of chromosomes{1:22} / length of
# the chromosome(i)) ) in %

ggplot(chr.cpgs.cnt, aes(x = chr, y = cpg_chr_freq, fill = chr)) + geom_bar(stat = "identity", position = position_dodge()) +
    geom_text(aes(label = scales::percent(cpg_chr_freq, accuracy = 0.1), y = cpg_chr_freq), stat = "identity",
        vjust = -0.5, size = 3) + scale_y_continuous(labels = scales::percent) + labs(title = "Delta meQTLs CpGs distribution across the chromosomes wieghted by the chromosomes' length",
    x = "Chromosome", y = "Relative frequency") + theme(panel.background = element_blank(), plot.title = element_text(size = 8),
    axis.title = element_text(size = 8), axis.text.x = element_text(angle = 20, hjust = 0.5), legend.position = "none")
CpGs distribution across chromosomes

CpGs distribution across chromosomes

Genomic regions

cbPalette <- c( "#CC6677", "#DDCC77", "#117733", "#AA4499", "#44AA99", "#888888")
plt.axis.size <- 8

ggplot(meqtl.cpg.anno.df, aes(x = Model, fill = Relation_to_Island)) + 
  geom_bar( position = "fill") + 
  geom_text(aes(by = Model, # scales::percent(..count../tapply(..count.., ..x.., sum), accuracy = 0.1), 
                y = (..count..)/sum(..count..)), 
            stat = "prop", 
            position = position_fill(vjust = .5), size = 3) +
  scale_y_continuous(labels = scales::percent) +
  labs(x = "",
       y = "Percentage of CpGs", 
       title = "") +
  theme(legend.position = "right",
        panel.grid.major = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size = 8),
        axis.title = element_text(size = 8),
        axis.text.x = element_text(angle = 0, hjust = 0.5)) +
   scale_fill_manual("Feature", values = cbPalette)
Distribution of locations of the CpGs in different genomic regions based on Illumina EPIC annotation

Distribution of locations of the CpGs in different genomic regions based on Illumina EPIC annotation

cbPalette <- c( "#CC6677", "#DDCC77", "#117733", "#AA4499", "#44AA99", "#888888")

ggplot(meqtl.cpg.anno.df[Model %in% c("Delta", "Baseline")], aes(x = Model, fill = Relation_to_Island)) + 
  geom_bar( position = "fill") + 
  geom_text(aes(by = Model, # scales::percent(..count../tapply(..count.., ..x.., sum), accuracy = 0.1), 
                y = (..count..)/sum(..count..)), 
            stat = "prop", 
            position = position_fill(vjust = .5), size = 3) +
  scale_y_continuous(labels = scales::percent) +
  labs(x = "",
       y = "Percentage of CpGs", 
       title = "") +
  theme(legend.position = "right",
        panel.grid.major = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size = 8),
        axis.title = element_text(size = 8),
        axis.text.x = element_text(angle = 0, hjust = 0.5)) +
   scale_fill_manual("Feature", values = cbPalette)
Distribution of locations of the CpGs in different genomic regions based on Illumina EPIC annotation

Distribution of locations of the CpGs in different genomic regions based on Illumina EPIC annotation

cbPalette <- c( "#CC6677", "#DDCC77", "#117733", "#AA4499", "#44AA99", "#888888")

ggplot(meqtl.cpg.anno.df[Model %in% c("Delta")], aes(x = Model, fill = Relation_to_Island)) + 
  geom_bar( stat = "count") + 
  geom_text(aes(by = Model, # scales::percent(..count../tapply(..count.., ..x.., sum), accuracy = 0.1), 
                label = scales::percent((..count..)/sum(..count..), accuracy = 0.1)), 
                stat = "prop", 
                position = position_stack(vjust = 0.6, ), size = plt.axis.size / 2, color = "white") +
  coord_polar(theta = 'y', start = 0) +
  labs(x = " ",
       y = "Percentage of CpGs", 
       title = "") +
  theme(legend.position = "right",
        legend.title = element_blank(),
        legend.text = element_text(size = plt.axis.size),
        panel.grid.major = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size = 8),
        axis.title = element_text(size = plt.axis.size),
        axis.text.x = element_blank()) +
  scale_fill_manual("Feature", values = cbPalette)
Distribution of locations of delta meQTL CpGs in different genomic regions based on Illumina EPIC annotation

Distribution of locations of delta meQTL CpGs in different genomic regions based on Illumina EPIC annotation

cbPalette <- c( "#CC6677", "#DDCC77", "#117733", "#AA4499", "#44AA99", "#888888")

ggplot(meqtl.unique.cpg.anno.df, aes(x = Model, fill = Relation_to_Island)) + 
  geom_bar( position = "fill") + 
  geom_text(aes(by = Model, # scales::percent(..count../tapply(..count.., ..x.., sum), accuracy = 0.1), 
                y = (..count..)/sum(..count..)), 
            stat = "prop", 
            position = position_fill(vjust = .5), size = 3) +
  scale_y_continuous(labels = scales::percent) +
  labs(x = "",
       y = "Percentage of CpGs", 
       title = "Unique meCpGs") +
  theme(legend.position = "right",
        panel.grid.major = element_blank(),
        panel.background = element_blank(),
        plot.title = element_text(size = 10),
        axis.title = element_text(size = 8),
        axis.text.x = element_text(angle = 0, hjust = 0.5)) +
   scale_fill_manual("Feature", values = cbPalette)
Distribution of locations of the meCpGs in different genomic regions based on Illumina EPIC annotation

Distribution of locations of the meCpGs in different genomic regions based on Illumina EPIC annotation

Gene regions

Annotation with Chipseeker

cbPalette <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", "#44AA99", "#999933",
    "#882255", "#661100", "#6699CC", "#888888")

ggplot(meqtl.snp.anno.stat.df, aes(x = Model, y = Frequency, fill = Feature)) + geom_bar(position = "fill",
    stat = "identity") + geom_text(data = subset(meqtl.snp.anno.stat.df, Frequency > 2), aes(by = Model,
    label = Frequency), position = position_fill(vjust = 0.5), size = 3) + labs(x = "", y = "Percentage of CpGs",
    title = "meCpGs annotation from UCSC for the hg19 genome build using TxDb.Hsapiens.UCSC.hg19.knownGene and ChIPseeker Bioconductor R packages") +
    theme(legend.position = "right", panel.grid.major = element_blank(), panel.background = element_blank(),
        plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text.x = element_text(angle = 0,
            hjust = 0.5)) + scale_fill_manual("Feature", values = cbPalette)
Distribution of gene-centric locations of the meCpGs based on Illumina EPIC annotation

Distribution of gene-centric locations of the meCpGs based on Illumina EPIC annotation

cbPalette <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", "#44AA99", "#999933",
    "#882255", "#661100", "#6699CC", "#888888")

ggplot(meqtl.snp.anno.stat.df[meqtl.snp.anno.stat.df$Model == "Delta", ], aes(x = Model, y = Frequency,
    fill = Feature)) + geom_col() + geom_text(data = subset(meqtl.snp.anno.stat.df[meqtl.snp.anno.stat.df$Model ==
    "Delta", ], Frequency > 2), aes(by = Model, label = paste0(Frequency, "%")), position = position_stack(vjust = 0.8,
    ), size = plt.axis.size/2, color = "white") + coord_polar(theta = "y", start = 0) + labs(x = " ",
    y = "Percentage of CpGs", title = "") + theme(legend.position = "right", legend.title = element_blank(),
    legend.text = element_text(size = plt.axis.size), panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text.x = element_blank()) +
    scale_fill_manual("Feature", values = cbPalette)
Distribution of gene-centric locations of the delta meCpGs based on Illumina EPIC annotation

Distribution of gene-centric locations of the delta meCpGs based on Illumina EPIC annotation

Enrichment of genomic locations in relation to island


Delta vs Baseline

gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
    "Non-significant")

ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = or.odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
    position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
    title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 8), axis.title = element_text(size = plt.axis.size), axis.text.x = element_text(angle = 0,
        hjust = 0.5, size = plt.axis.size), axis.text.y = element_text(size = plt.axis.size), legend.text = element_text(size = plt.axis.size)) +
    scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs.  The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Delta vs All

gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
    "Non-significant")

ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = or.odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
    position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
    title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text.x = element_text(angle = 0,
        hjust = 0.5)) + scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs.  The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Baseline vs All

gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
    "Non-significant")

ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = or.odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
    position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
    title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text.x = element_text(angle = 0,
        hjust = 0.5)) + scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs.  The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Enrichment of gene-centric locations


Delta vs Baseline

gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
    "Non-significant")

ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
    position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
    title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 8), axis.title = element_text(size = plt.axis.size), axis.text.x = element_text(angle = 20,
        hjust = 0.5, size = plt.axis.size), axis.text.y = element_text(size = plt.axis.size), legend.text = element_text(size = plt.axis.size)) +
    scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs.  The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Delta vs All

gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
    "Non-significant")

ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
    position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
    title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 8), axis.title = element_text(size = plt.axis.size), axis.text.x = element_text(angle = 20,
        hjust = 0.5, size = plt.axis.size), axis.text.y = element_text(size = plt.axis.size), legend.text = element_text(size = plt.axis.size)) +
    scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs.  The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Baseline vs All

gen.loc.enrich.perm.rslt[["is_sign"]] <- ifelse(gen.loc.enrich.perm.rslt$p_val_emp < 0.05, "Significant",
    "Non-significant")

ggplot(gen.loc.enrich.perm.rslt, aes(x = Feature, y = odds.ratio, fill = is_sign)) + geom_bar(stat = "identity",
    position = position_dodge()) + geom_hline(yintercept = 1, color = "red") + labs(x = "", y = "Odds ratio",
    title = "") + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.background = element_blank(),
    plot.title = element_text(size = 8), axis.title = element_text(size = plt.axis.size), axis.text.x = element_text(angle = 20,
        hjust = 0.5, size = plt.axis.size), axis.text.y = element_text(size = plt.axis.size), legend.text = element_text(size = plt.axis.size)) +
    scale_fill_manual("", values = c("#999999", "#009E73"))
Genomic region enrichment for the delta meCpGs.  The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

Genomic region enrichment for the delta meCpGs. The Y-axis denotes the fold enrichment/depletion as compared to baseline meCpGs. Green bars indicate significant enrichment/depletion, grey bars non-significant differences based on permutation Fisher-tests empirical P <= 0.05.

ChromHMM Enrichment


Blood

Histone mark enrichment for the GR-induced meCpGs using 15 chromatin states from 18 blood and T- & B-cells types. The first figure denotes the fold enrichment / depletion as compared to baseline meCpGs, the second figure denotes significance based on the empirical P≤0.05 with 1,000 permutations.

enrich.plts$or
The figure denotes the fold enrichment / depletion as compared to baseline meCpGs

The figure denotes the fold enrichment / depletion as compared to baseline meCpGs

enrich.plts$pval
The figure denotes significance based on the empirical P≤0.05 with 1,000 permutations

The figure denotes significance based on the empirical P≤0.05 with 1,000 permutations

IDEAS Enrichment


Blood

Histone mark enrichment for the GR-induced meCpGs using condensed annotation of 20 chromatin states from 18 blood and T- & B-cells types. The first figure denotes the fold enrichment / depletion as compared to baseline meCpGs, the second figure denotes significance based on the empirical P≤0.05 with 1,000 permutations.

enrich.plts$or
The figure denotes the fold enrichment / depletion as compared to baseline meCpGs

The figure denotes the fold enrichment / depletion as compared to baseline meCpGs

enrich.plts$pval
The figure denotes significance based on the empirical P≤0.05 with 1,000 permutations

The figure denotes significance based on the empirical P≤0.05 with 1,000 permutations